Revision: arch--devo--1.0--patch-64
Archive: lord@regexps.com--2002
Creator: Tom Lord <lord@regexps.com>
Date: Fri Dec 20 18:25:06 PST 2002
Standard-date: 2002-12-21 02:25:06 GMT
Summary: eliminated tabs in the shell scripts
Keywords: 
New-files: {arch}/arch/arch--devo/arch--devo--1.0/lord@regexps.com--2002/patch-log/patch-64
Modified-files: ChangeLog
  ChangeLog.d/geisler@ece.nwu.edu--public/ChangeLog.geisler--1.0
  ChangeLog.d/geisler@ece.nwu.edu--src/ChangeLog.geisler--1.0
  ChangeLog.d/lord@regexps.com--2002/ChangeLog.lord--1.0
  ChangeLog.d/lord@regexps.com--2002/ChangeLog.lord-doc--1.0
  arch/larch.sh.in archive-transactions/=former/commit.sh
  archive-transactions/arch-rmrf.sh
  archive-transactions/build-by-patching.sh
  archive-transactions/build-revision.sh
  archive-transactions/commit.sh
  archive-transactions/copy-from-archive-cache.sh
  archive-transactions/copy-from-fresh-start-base.sh
  archive-transactions/get-patch.sh
  archive-transactions/get.sh archive-transactions/import.sh
  archive-transactions/lock-branch.sh
  archive-transactions/lock-revision.sh
  archive-transactions/make-lock.sh
  archive-transactions/push-mirror.sh
  archive-transactions/putdir.sh
  archive-transactions/putlast.sh
  archive-transactions/wd-txn.sh
  archives/archive-cache-revision.sh
  archives/archive-cached-revisions.sh
  archives/archive-readme.sh
  archives/archive-uncache-revision.sh
  archives/branch-readme.sh archives/branches.sh
  archives/cat-archive-log.sh archives/categories.sh
  archives/category-readme.sh archives/get-archive-name.sh
  archives/make-archive.sh archives/make-branch.sh
  archives/make-category.sh archives/make-version.sh
  archives/noop.sh archives/previous-patch-level.sh
  archives/revisions.sh archives/version-readme.sh
  archives/versions.sh branching-and-merging/delta-patch.sh
  branching-and-merging/finish-branch.sh
  branching-and-merging/join-branch.sh
  branching-and-merging/make-sync-tree.sh
  branching-and-merging/prepare-branch.sh
  branching-and-merging/reconcile.sh
  branching-and-merging/replay.sh
  branching-and-merging/star-merge.sh
  branching-and-merging/tag.sh branching-and-merging/update.sh
  branching-and-merging/whats-missing.sh
  configurations/=later/what-changed-config.sh
  configurations/build-config.sh
  configurations/config-history.sh
  configurations/record-config.sh
  configurations/replay-config.sh
  configurations/show-config.sh
  configurations/update-config.sh input/file-syntax-filter.sh
  inventory/=later/file-tag.sh inventory/add.sh
  inventory/check-manifest.sh inventory/delete.sh
  inventory/explicit-default.sh inventory/inventory.sh
  inventory/manifest.sh inventory/missing-tags.sh
  inventory/move.sh inventory/set-manifest.sh
  inventory/tagging-method.sh inventory/tree-lint.sh
  library/cat-library-file.sh library/copy-from-library.sh
  library/file-history.sh library/library-add.sh
  library/library-archives.sh library/library-branches.sh
  library/library-categories.sh library/library-file.sh
  library/library-find.sh library/library-log.sh
  library/library-remove.sh library/library-revisions.sh
  library/library-versions.sh library/my-revision-library.sh
  library/touched-files-prereqs.sh local-cache/add-pristine.sh
  local-cache/cache-dir.sh local-cache/cached-index.sh
  local-cache/copy-from-cache.sh
  local-cache/delete-pristine.sh local-cache/file-diffs.sh
  local-cache/find-in-cache.sh local-cache/lock-pristine.sh
  local-cache/make-cache-temp-dir.sh local-cache/old-file.sh
  local-cache/pristines.sh local-cache/take-from-cache.sh
  local-cache/wd-mv-pristine.sh local-cache/what-changed.sh
  naming-conventions/indicated-config-file.sh
  naming-conventions/indicated-revision.sh
  naming-conventions/parse-package-name.sh
  naming-conventions/patch-level-lt.sh
  naming-conventions/sort-revisions.sh
  naming-conventions/valid-archive-location.sh
  naming-conventions/valid-archive-name.sh
  naming-conventions/valid-config-name.sh
  naming-conventions/valid-id.sh
  naming-conventions/valid-package-name.sh
  naming-conventions/valid-patch-level-name.sh
  notify/mail-new-branches.sh notify/mail-new-categories.sh
  notify/mail-new-revisions.sh notify/mail-new-versions.sh
  notify/my-notifier.sh notify/notify-browser.sh
  notify/notify-library.sh notify/notify.sh
  notify/push-new-revisions.sh notify/sendmail-mailx.sh
  output/body-indent.sh output/file-list.sh output/heading.sh
  output/nest.sh output/nested.sh output/top.sh
  patch-logs/add-log.sh patch-logs/cat-log.sh
  patch-logs/changelog.sh patch-logs/copy-to-patch-log.sh
  patch-logs/log-for-merge.sh patch-logs/log-header-field.sh
  patch-logs/log-ls.sh patch-logs/logs.sh
  patch-logs/make-log.sh patch-logs/merge-points.sh
  patch-logs/new-on-branch.sh patch-logs/remove-log.sh
  patch-logs/valid-log-file.sh
  patch-logs/wd-check-for-patch-log.sh
  patch-sets/=later/mkpatch2.sh patch-sets/dopatch.sh
  patch-sets/empty-patch.sh patch-sets/mkpatch.sh
  patch-sets/patch-add-log.sh patch-sets/patch-report.sh
  project-tree/copy-or-stash.sh
  project-tree/copy-tree-precious.sh project-tree/init-tree.sh
  project-tree/set-tree-version.sh project-tree/tree-repair.sh
  project-tree/tree-root.sh project-tree/tree-version.sh
  remote/with-archive-settings.sh remote/with-archive.sh
  remote/without-archive.sh user/archives.sh
  user/my-default-archive.sh user/my-id.sh
  user/register-archive.sh user/whereis-archive.sh
  web/distribution-name.sh web/make-library-browser.sh
  web/patch-details.sh web/patch-set-web.sh
  web/update-distributions.sh
New-patches: lord@regexps.com--2002/arch--devo--1.0--patch-64

For the benefit of users whose editors display tabs oddly,
I've eliminated most tabs from the shell script sources using the
enclosed quick-and-dirty program:

/* fix-sh-tabs.c: eliminate tabs in shell scripts
 *
 ****************************************************************
 * Copyright (C) 2002  Tom Lord
 * 
 * See the file "COPYING" for further information about
 * the copyright and warranty status of this work.
 */


#include "config-options.h"
#include "hackerlab/cmd/main.h"



enum unquoted_context
{
  normal_unquoted,
  double_quoted_backquote_unquoted,
  nestable_command_unquoted,
  nestable_var_expansion_unquoted
};



/* __STDC__ prototypes for static functions */
static int replace_tab (int out_fd, int col);
static int keep_tab (int out_fd, int col);
static int fix_unquoted (int in_fd, int out_fd, int col, enum unquoted_context cotext);
static int fix_quoted (int in_fd, int out_fd, int col, t_uchar quote);
static void fix_comment (int in_fd, int out_fd, int col);
static int fix_nestable_var_expansion (int in_fd, int out_fd, int col);



static t_uchar * program_name = "fix-sh-tabs";
static t_uchar * usage = "fix-sh-tabs [options]";
static t_uchar * version_string = (cfg__std__package " from regexps.com\n"
				   "\n"
				   "Copyright 2002 Tom Lord\n"
				   "\n"
				   "This is free software; see the source for copying conditions.\n"
				   "There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A\n"
				   "PARTICULAR PURPOSE.\n"
				   "\n"
				   "Report bugs to <lord@regexps.com>.\n"
				   "\n");


#define OPTS(OP, OP2) \
  OP (opt_help_msg, "h", "help", 0, \
      "Display a help message and exit.") \
  OP (opt_long_help, "H", 0, 0, \
      "Display a verbose help message and exit.") \
  OP (opt_version, "V", "version", 0, \
      "Display a release identifier string") \
  OP2 (opt_version, 0, 0, 0, "and exit.")

static t_uchar long_help[] = ("Turn tabs into spaces, presuming the input is a /bin/sh script.\n"
			      "Lines from standard input are copied to standar output,\n"
			      "eliminating all tabs except for non-leading tabs in quoted text.\n"
			      "\n");

enum options
{
  OPTS (OPT_ENUM, OPT_IGN)  
};

struct opt_desc opts[] = 
{
  OPTS (OPT_DESC, OPT_DESC)
    {-1, 0, 0, 0, 0}
};



/* textual hook for future generalization:
 */
#define TABWIDTH	(8)



int
main (int argc, char * argv[])
{
  int errn;
  int o;
  struct opt_parsed * option;
  t_uint line_width;

  option = 0;
  line_width = 80;

  safe_buffer_fd (0, 0, O_RDONLY, 0);
  safe_buffer_fd (1, 0, O_WRONLY, 0);

  while (1)
    {
      o = opt_standard (lim_use_must_malloc, &option, opts, &argc, argv, program_name, usage, version_string, long_help, opt_help_msg, opt_long_help, opt_version);
      if (o == opt_none)
	break;
      switch (o)
	{
	default:
	  safe_printfmt (2, "unhandled option `%s'\n", option->opt_string);
	  panic ("internal error parsing arguments");

	usage_error:
	  opt_usage (2, argv[0], program_name, usage, 1);
	  exit (1);

	bogus_arg:
	  safe_printfmt (2, "ill-formed argument for `%s' (`%s')\n", option->opt_string, option->arg_string);
	  goto usage_error;
	}
    }

  (void)fix_unquoted(0, 1, 0, normal_unquoted);


  return 0;
}



static int
replace_tab (int out_fd, int col)
{

  int x;
  int amt;
  
  amt = TABWIDTH - (col % TABWIDTH);

  for (x = 0; x < amt; ++x)
    {
      safe_write (out_fd, " ", 1);
    }

  return col + amt;
}


static int
keep_tab (int out_fd, int col)
{

  int x;
  int amt;
  
  amt = TABWIDTH - (col % TABWIDTH);
  safe_write (out_fd, "\t", 1);
  return col + amt;
}


static int
fix_unquoted (int in_fd, int out_fd, int col, enum unquoted_context context)
{
  t_uchar c;

  if (context == double_quoted_backquote_unquoted)
    {
      safe_write (out_fd, "`", 1);
      col += 1;
    }

  while (safe_read (in_fd, (char *)&c, 1))
    {
      switch (c)
	{
	default:
	default_case:
	  {
	    safe_write (out_fd, (char *)&c, 1);
	    col += 1;
	    break;
	  }

	case '\t':
	  {
	    col = replace_tab (out_fd, col);
	    break;
	  }


	case '\n':
	  {
	    safe_write (out_fd, "\n", 1);
	    col = 0;
	    break;
	  }

	case '#':
	  {
	    fix_comment (in_fd, out_fd, col);
	    col = 0;
	    break;
	  }

	case '\'':
	case '"':
	  {
	    col = fix_quoted (in_fd, out_fd, col, c);
	    break;
	  }

	case '`':
	  {
	    if (context != double_quoted_backquote_unquoted)
	      goto default_case;
	    else
	      {
		safe_write (out_fd, (char *)&c, 1);
		col += 1;
		return col;
	      }
	  }

	case '$':
	  {
	    safe_read (in_fd, (char *)&c, 1);

	    switch (c)
	      {
	      default:
		safe_write (out_fd, "$", 1);
		safe_write (out_fd, (char *)&c, 1);
		col += 2;
		break;

	      case '{':
		col = fix_nestable_var_expansion (in_fd, out_fd, col);
		break;

	      case '(':
		safe_write (out_fd, "$(", 2);
		col += 2;
		col = fix_unquoted (in_fd, out_fd, col, nestable_command_unquoted);
		break;
	      }
	    break;
	  }

	case ')':
	  {
	    if (context != nestable_command_unquoted)
	      goto default_case;
	    else
	      {
		safe_write (out_fd, ")", 1);
		col += 1;
		return col;
	      }
	  }

	case '}':
	  {
	    if (context != nestable_var_expansion_unquoted)
	      goto default_case;
	    else
	      {
		safe_write (out_fd, "}", 1);
		col += 1;
		return col;
	      }
	  }


	case '\\':
	  {
	    safe_write (out_fd, "\\", 1);
	    ++col;

	    if (!safe_read (in_fd, (char *)&c, 1))
	      return;

	    safe_write (out_fd, (char *)&c, 1);
	    if (c == '\n')
	      col = 0;
	    else if (c == '\t')
	      col += TABWIDTH - (col % TABWIDTH);
	    else
	      ++col;

	    break;
	  }
	}
    }
  return col;
}


static int
fix_quoted (int in_fd, int out_fd, int col, t_uchar quote)
{
  t_uchar c;
  int is_leading;

  /* 1003.2 3.2.2 ("Single Quotes")
   *
   * "Enclosing characters in single quotes (' ') shall preserve the literal
   * value of each character within the single quotes.  A single quote cannot 
   * occur within single quotes."
   * 
   * 1003.2 3.2.3 ("Double Quotes"):
   *
   * "[....] The backslash shall retain its special meaning as an escape character
   * (see 3.2.1) only when followed by one of the characters:
   *		$ ` " \ <newline>
   * [....]"
   * 
   * So, which tabs do we replace?
   * 
   * There's no perfect answer, but here's a pretty good heursitic:
   * 
   * Leading tabs on normal lines should be changed to spaces.
   * 
   * Non-leading tabs should not be altered.  They may be part of a
   * regexp, for example.
   *
   * Text following a backslash-newline is not considered leading text.
   * 
   * Single quoted strings are terminated by any single quote.
   * 
   * Double quoted strings are terminated by any double quote that is not
   * preceeded by a backslash.
   */

  is_leading = 0;
  safe_write (out_fd, (char *)&quote, 1); /* write the opening quote character */
  ++col;

  while (safe_read (in_fd, (char *)&c, 1))
    {
      switch (c)
	{
	default:
	default_case:
	  {
	    safe_write (out_fd, (char *)&c, 1);
	    col += 1;
	    is_leading = 0;
	    break;
	  }

	case ' ':
	  {
	    safe_write (out_fd, (char *)&c, 1);
	    col += 1;
	    break;
	  }

	case '\t':
	  {
	    if (is_leading)
	      {
		col = replace_tab (out_fd, col);
	      }
	    else
	      {
		col = keep_tab (out_fd, col);
	      }
	    break;
	  }

	case '\n':
	  {
	    safe_write (out_fd, "\n", 1);
	    col = 0;
	    is_leading = 1;
	    break;
	  }

	case '\'':
	case '\"':
	  {
	    if (c == quote)
	      {
		safe_write (out_fd, (char *)&c, 1);
		++col;
		return col;
	      }
	    else
	      goto default_case;
	  }

	case '$':
	  {
	    if (quote == '\'')
	      goto default_case;

	    safe_read (in_fd, (char *)&c, 1);

	    switch (c)
	      {
	      default:
		safe_write (out_fd, "$", 1);
		safe_write (out_fd, (char *)&c, 1);
		col += 2;
		break;

	      case '{':
		col = fix_nestable_var_expansion (in_fd, out_fd, col);
		break;

	      case '(':
		safe_write (out_fd, "$(", 2);
		col += 2;
		col = fix_unquoted (in_fd, out_fd, col, nestable_command_unquoted);
		break;
	      }
	    break;
	  }

	case '`':
	  {
	    /* 1003.2 3.2.3
	     *
	     * "The portion of the quoted string from the initial backquote and
	     * the characters up to the next backquote that is not preceeded by
	     * a backslash [....] defines that command whose output replaces
	     * `....`."
	     *
	     * Unbalanced quotes produce undefined behavior.
	     * 
	     * Thefore, for untabifying, we treat this _almost_ as unquoted text,
	     * but don't allow embedded double-quoted text.
	     */

	    if (c == '\'')
	      goto default_case;
	    else
	      {
		col = fix_unquoted (in_fd, out_fd, col, double_quoted_backquote_unquoted);
		break;
	      }
	  }

	case '\\':
	  {
	    safe_write (out_fd, "\\", 1);
	    ++col;

	    if (!safe_read (in_fd, (char *)&c, 1))
	      return col;

	    safe_write (out_fd, (char *)&c, 1);

	    if (c == '\n')
	      col = 0;
	    else if (c == '\t')
	      col += TABWIDTH - (col % TABWIDTH);
	    else
	      ++col;

	    is_leading = 0;

	    if ((quote == '\'') && (c == '\''))
	      {
		/* single-quote terminates a single-quoted string even
		 * when preceeded by backslash.
		 */
		return col;
	      }

	    break;
	  }
	}
    }
}

static void
fix_comment (int in_fd, int out_fd, int col)
{
  t_uchar c;

  safe_write (out_fd, "#", 1);
  ++col;

  while (safe_read (in_fd, (char *)&c, 1))
    {
      switch (c)
	{
	default:
	default_case:
	  {
	    safe_write (out_fd, (char *)&c, 1);
	    col += 1;
	    break;
	  }

	case '\t':
	  {
	    col = replace_tab (out_fd, col);
	    break;
	  }

	case '\n':
	  {
	    safe_write (out_fd, "\n", 1);
	    return;
	  }
	}
    }
}

static int
fix_nestable_var_expansion (int in_fd, int out_fd, int col)
{
  t_uchar c;

  safe_write (out_fd, "${", 2);
  col += 2;

  while (safe_read (in_fd, (char *)&c, 1))
    {
      switch (c)
	{
	default:
	  safe_write (out_fd, (char *)&c, 1);
	  col += 1;
	  break;

	case '}':
	  safe_write (out_fd, (char *)&c, 1);
	  col += 1;
	  return col;


	case '$':
	  {
	    safe_read (in_fd, (char *)&c, 1);

	    switch (c)
	      {
	      default:
		safe_write (out_fd, "$", 1);
		safe_write (out_fd, (char *)&c, 1);
		col += 2;
		break;

	      case '{':
		col = fix_nestable_var_expansion (in_fd, out_fd, col);
		break;

	      case '(':
		safe_write (out_fd, "$(", 2);
		col += 2;
		col = fix_unquoted (in_fd, out_fd, col, nestable_command_unquoted);
		break;
	      }
	    break;
	  }      
	}
    }
}



